This code sample is intended to be a reproducible example of downloading, processing, and visualizing climate time series across the conterminous US. In this code we will explore a monthly time series of precipitation data from the GridMET data product (http://www.climatologylab.org/gridmet.html). We will detrend these precipitation data and transform these raw precipitation data into precipitation anomalies, which is just a simple deviation from the long term mean. By transforming into anomalies we can better understand how much increase of decrease precipitation is occurring across space and time, and how different those deviations are from the long term mean. At the very end we will extract the mean precipitation anomalies across defined regions in the conterminous US and create a series of time series plots.
First we need to setup our environment
packages <- c("tidyverse", "magrittr", "raster", "RCurl", "sf", "assertthat", 'lubridate', 'ncdf4', 'snow', 'remote',
'RColorBrewer', 'rasterVis', 'zoo', 'gridExtra', 'parallel', 'doParallel', 'pbapply', 'velox')
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
# automatically installs packages if not found
install.packages(setdiff(packages, rownames(installed.packages())))
# loads the library once installed
lapply(packages, library, character.only = TRUE, quietly = TRUE)
} else {
# if package is already install this loads it
lapply(packages, library, character.only = TRUE, quietly = TRUE)
}
Now lets create some folders to store our raw and processed data in…
prefix <- "data"
raw_dir <- file.path(prefix, "raw")
us_dir <- file.path(raw_dir, "cb_2016_us_state_20m")
ecoregion_dir <- file.path(raw_dir, "na_cec_eco_l1")
climate_dir <- file.path(raw_dir, "climate")
processed_dir <- file.path(prefix, "processed")
processed_climate <- file.path(processed_dir, 'climate')
# Check if directory exists for all variable aggregate outputs, if not then create
var_dir <- list(prefix, raw_dir, us_dir, ecoregion_dir, climate_dir, processed_dir, processed_climate)
lapply(var_dir, function(x) if(!dir.exists(x)) dir.create(x, showWarnings = FALSE))
Let’s define some small functions that we can call to make things more efficient and readable throughout the code.
# Create some simple functions to use later in the code
# Download the data
download_data <- function(url, dir) {
# because some files have a 'zip' extension while others dont we have to automatically use the native extension
file_extension <- url %>%
basename %>%
strsplit(., '[.]') %>%
unlist
file_extension_name <- file_extension[1] # get the file name
file_extension_ext <- file_extension[2] # get the extension name
download.file(url,
destfile = file.path(dir, paste0(file_extension_name, ".", file_extension_ext)))
if(file_extension_ext == "zip"){
unzip(file.path(dir, paste0(file_extension_name, ".", file_extension_ext)),
exdir = dir)
unlink(file.path(dir, paste0(file_extension_name, ".", file_extension_ext)))
}
}
# We will be doing some plotting, so here we can define our base plots
# A function for our simple spatial plot
raster_plot <- function(input_series, layout_vals, title) {
rasterVis::levelplot(input_series,
layout = layout_vals,
par.settings = list(
axis.line = list(col = 'transparent')),
scales = list(draw = FALSE),
colorkey = TRUE, region = TRUE,
col.regions = colorRampPalette(brewer.pal(10, 'RdYlBu')),
xlab.top = title,
at=seq(min(minValue(input_series)), max(maxValue(input_series)))) +
layer(sp.polygons(as(usa, 'Spatial'), lwd = 2)) # Overlay the usa states
}
# A function that defines our hovmoller plot
hovmoller_plot <- function(input_series, legend_range) {
hovmoller(input_series,
at = legend_range,
panel = panel.levelplot.raster,
interpolate = TRUE,
yscale.components = yscale.raster.subticks,
par.settings = BuRdTheme)
}
set_names <- function(x, start_date, end_date) {
idx <- seq(as.Date(start_date), as.Date(end_date), by='month')
var_names <- as.yearmon(idx)
x <- setZ(x, var_names)
names(x) <- as.character(var_names) # name the layers based on the month/year combination
return(x)
}
# Get residuals to detrend our raw precipitation data
get_residuals <- function(x) {
if (is.na(x[1])){
rep(NA, length(x)) }
else {
m <- lm(x~time)
q <- residuals(m) + predict(m)[1] # to remove the slope (detrend) add the residuals and the intercept
return(q)
}
}
# Theme for plotting our time series in ggplot
theme_pub <- function(base_size=11, base_family="") {
library(grid)
library(ggthemes)
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(hjust = 0.05, size = 13),
panel.border = element_rect(colour = NA),
panel.background = element_rect(colour = NA),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
plot.background = element_rect(colour = NA),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
legend.title = element_text(size=11),
legend.position = "right",
legend.text = element_text(size=11),
legend.direction = "vertical",
legend.key = element_rect(colour = "transparent", fill = "white"),
strip.background=element_rect(colour=NA),
strip.text.x = element_text(size = 10),
axis.title = element_text(size = 11),
axis.text.x = element_text(size = 10, angle = 65, hjust = 1),
axis.text.y = element_text(size = 11)))
}
Now we can download the two data sets needed for this analysis
# USA states shapefile
# Make sure we haven't alredy downloaded the data
if(!file.exists(file.path(us_dir, "cb_2016_us_state_20m.shp"))) {
download_data("https://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_state_20m.zip",
us_dir)
}
# Level 1 ecoregions
# Make sure we haven't alredy downloaded the data
if(!file.exists(file.path(ecoregion_dir, "NA_CEC_Eco_Level1.shp"))) {
download_data("ftp://newftp.epa.gov/EPADataCommons/ORD/Ecoregions/cec_na/na_cec_eco_l1.zip",
ecoregion_dir)
}
# Monthly Precipitation from 1979-2017
# Make sure we haven't alredy downloaded the data
if(!file.exists(file.path(climate_dir, 'pr_gridMET.nc'))) {
download_data("https://www.northwestknowledge.net/metdata/data/monthly/pr_gridMET.nc",
climate_dir)
}
Now that the data has been downloaded let’s start processing!
First, we are going to define our study area. To do this we are going to manipulate the USA state shapefile that we downloaded previously. We first need to transform our spatial coordinate system to match the raster data. Doing this tranformation on the vector file is much faster and more accurate than trying to reproject our raster time series. We defined the spatial projection earlier as WGS 84 (lat/long). We then want to make sure we just have the lower 48 states. For our final analysis we are going to look at regional variations in the data so we define those regions here. Because the sf object plays nicely with the tidyverse all of these dataframe manipulations are as easy on a spatial dataframe as they would be on a non-spatial dataframe. This is a huge improvement and difference compared to using the sp package for vector shapefiles. Using the dply::group_by function we can dissolve our polygons based on an attribute. In this case we can dissolve by our defined regions to remove state boundaries.
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries
Now that we have our study area defined and prepped, we are going to import the precipitation data. These data are in NetCDF format, which is simply a multidimensional data file. Contained within this one NetCDF file is the attribute (x), space (y), and time (z) components of this time series. They are highly compressible and easily distributed. This particular NetCDF cannot be read in through the raster package, but the majority of NetCDFs can be read in throug the raster package, so we are going to use the ncdf4 package to accomplish this import task.
Upon importing, NetCDFs are notorius for inverted dimensions, which can make the import seem like it has failed. For instance, if we were to import the precipitation data and plot the first time step (Jan 1979) this is the resulting raster would look like…
# Open the NetCDF file
nc <- nc_open(file.path(climate_dir, 'pr_gridMET.nc'))
# Unpack and transform the NetCDF file
inverted_netcdf <- attributes(nc$var)$names %>%
ncvar_get(nc, .) %>%
raster::brick(., crs= proj) # mask out to just the conterminous US
plot(inverted_netcdf[[1]])
Clearly the dimensions are not displaying in the proper orientation. To correct that we are going to transpose the 3 dimensions (attribute, space, time) to reflect the correct order. One quick note/tip. You may have noticed that I am using the %>% operator. This is called a pipe and allows outputs of one task to be piped directly into the next operation without creating multiple intermediate objects. By piping the code becomes much more readable and clean. By importing the tidyverse you have automatically loaded the package, but within the tidyverse contains the magrittr package where the piping operator lives. You can use the piping operator outside of tidyverse operations, for instance below you will notice that we used this to pipe through the NetCDF import and into the raster::brick operator. Quite handly!
precip_series <- attributes(nc$var)$names %>%
ncvar_get(nc, .) %>%
aperm(., c(2,3,1)) %>% # Transpose the dimensions in the correct order. This takes some trial and error
raster::brick(., crs= proj) # Create a raster time series stack that we can use for analysis
extent(precip_series) <- c(-124.793, -67.043, 25.04186, 49.41686) #set spatial extents
precip_series <- precip_series %>%
crop(as(study_area, 'Spatial')) %>%
mask(as(study_area, 'Spatial')) # mask out to just the Colorado and New Mexico US
# Set the names of the raster stack
precip_series <- set_names(precip_series, '1979-01-01', '2017-12-01')
raster_plot(input_series = precip_series[[457:468]], layout_vals = c(4,3), title = 'Raw (mm)')
We know that there is inherent seasonality within our precipitation data. One way to remove the seasonality is to detrend the time series. To accomplish this we can simply extract the residuals from a linear regression between the series (the dependent variable) and time (the independent variable).
time <- 1:nlayers(precip_series) # Create our independent variable
detrended_precip <- calc(precip_series, get_residuals) # Create our residual (detrended) time series stack
detrended_precip <- set_names(detrended_precip, '1979-01-01', '2017-12-01')
raster_plot(input_series = detrended_precip[[457:468]], layout_vals = c(4,3), title = 'Detrended')
deseasoned_precip <- remote::deseason(detrended_precip, cycle.window = 12L, use.cpp = TRUE)
deseasoned_precip <- set_names(deseasoned_precip, '1979-01-01', '2017-12-01')
raster_plot(input_series = deseasoned_precip[[457:468]], layout_vals = c(4,3), title = 'Deseasoned and detrended')
Ok, so now that we have created our detrened precipitation time series, let’s create our precipitation anomalies using those detrended data.
# Create precipitation anomalies
precip_anom <- remote::anomalize(detrended_precip)
precip_anom <- set_names(precip_anom, '1979-01-01', '2017-12-01')
raster_plot(input_series = precip_anom[[457:468]], layout_vals = c(4,3), title = 'Anomalies')
Now that all of the data are processed, how do those time series look spatially and temporally?
may_stack <- stack(precip_series[[461]], detrended_precip[[461]], deseasoned_precip[[461]])
raster_plot(input_series = may_stack, layout_vals = c(1,3), title = 'Raw (1), Detrended (2), Deseason and Detrended (3)')
You will notice that that there are differences in precipitation for the May months of 1979 and 2017, but this could be due to seasonality or external climatological forcings, like El Nino events.
Similar to the plots of the month of May, this hovmoller plot shows differences in precipitation over space and time, but there may be a lot of seasonality embedded in the time series. We will now explore the precipitation anomalies to see if those differences were from seasonal effects or if they are in fact part of a longer term trend of precipitation change.
may_stack <- stack(deseasoned_precip[[461]], precip_anom[[461]])
raster_plot(input_series = may_stack, layout_vals = c(1,2), title = 'Deseason and Detrended (1) and Anomalies (2)')
From the comparisons it is clear that the precipitation anomalies are better at conveying precipitation trends across space and time.
#Extract the mean precipitation per state
precip_mean_df <- velox(precip_anom)$extract(sp = study_area, fun = function(x) mean(x, na.rm = TRUE), df = TRUE) %>%
as_tibble()
# Rename to the raster layer names
colnames(precip_mean_df) <- c('ID_sp', names(precip_anom))
# Clean up the dataframe and create a 3-year rolling average
precip_roll_mean_df <- precip_mean_df %>%
mutate(stusps = as.data.frame(study_area)$stusps) %>%
gather(key = key, value = precip_mean, -ID_sp, -stusps) %>%
separate(key,
into = c("months", 'year'),
sep = "\\.") %>%
mutate(year_mon = as.Date(as.yearmon(paste0(months, ' ', year))),
mean_3yr =rollapply(precip_mean, 12, mean, align='center', fill=NA))
head(precip_roll_mean_df)
## # A tibble: 6 x 7
## ID_sp stusps months year precip_mean year_mon mean_3yr
## <int> <fct> <chr> <chr> <dbl> <date> <dbl>
## 1 1 CO Jan 1979 8.82 1979-01-01 NA
## 2 1 CO Feb 1979 -18.3 1979-02-01 NA
## 3 1 CO Mar 1979 21.0 1979-03-01 NA
## 4 1 CO Apr 1979 -9.89 1979-04-01 NA
## 5 1 CO May 1979 35.7 1979-05-01 NA
## 6 1 CO Jun 1979 6.02 1979-06-01 0.187
precip_mean_p <- precip_roll_mean_df %>%
ggplot(aes(x = year_mon, y = mean_3yr, color = stusps, group = stusps)) +
geom_point() +
geom_smooth() +
xlab("Year") + ylab("2-year running mean of precipitation anomalies") +
theme_pub() + theme(legend.position = "none")
precip_mean_p
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).